################################################
# Clean replication code for Tandeo 2022: Analyzing survey data
# Created AJH
# Created 4/27/2022
# Last edited 6/10/2022
################################################
library(tidyverse)
library(srvyr)
library(survey)
library(stargazer)
library(sf)
library(gridExtra)

# note- all data uses two sets of weights:
    # design weights: (in-person only), used to correct for oversampling of certain types of neighborhoods in in-person survey
    # post stratification weights: (both in person and online) reweight the survey population to match demographic distribution of the five alcaldias together

  # nice tutorial on "surveyr" (weights in r)
  # https://federicovegetti.github.io/teaching/heidelberg_2018/lab/sst_lab_day2.html
  
  # load in data
  load("citizen_public_service_survey/pooled_weighted_five.Rdata")
  # join with rationing data
  load("chapter_5_rewarding_responsiveness/additional_data/tandeo_colonias.Rdata") 
  # note that there are about 200 observations in which the person answered that their colonia was "other". For now, just assign their tandeo status as NA
  dat <- left_join(dat, tandeo_colonias[,c("cve_col", "tandeo_2021")], by ="cve_col")

  # set design matrix with weights
  dat_we <- dat %>% 
    as_survey(weights = c("design_weight", "ps_weights"))
  
  ########
  # First, validate the relationship between IWS and Tandeo 
  ########
  dat %>% 
    as_survey(weights = c("design_weight", "ps_weights")) %>% 
    group_by(tandeo_2021) %>% 
    summarize(water_hours= survey_mean(water_hours_weekly, na.rm=TRUE)) 
  #  Unrationed places have, on average, 96 hours of water a week, while rationed places have 63
  
  # Make a figure of water hours 
  dat$water_supply_cat <- case_when(
    dat$water_hours_weekly <5~ "<5",
    dat$water_hours_weekly >5 & dat$water_hours_weekly <= 15 ~ "5-15",
    dat$water_hours_weekly >15 & dat$water_hours_weekly <= 30 ~ "15-30",
    dat$water_hours_weekly >30 & dat$water_hours_weekly <= 45  ~ "30-45",
    dat$water_hours_weekly >45 & dat$water_hours_weekly <= 60  ~ "45-60",
    dat$water_hours_weekly >60 & dat$water_hours_weekly <= 75  ~ "60-75",
    dat$water_hours_weekly >75 & dat$water_hours_weekly <= 90  ~ "75-90",
    dat$water_hours_weekly >90 & dat$water_hours_weekly <= 105  ~ "90-105",
    dat$water_hours_weekly >105 & dat$water_hours_weekly <= 120  ~ "105-120",
    dat$water_hours_weekly >120 & dat$water_hours_weekly <= 135  ~ "120-135",
    dat$water_hours_weekly >135 & dat$water_hours_weekly <= 150  ~ "135-150",
    dat$water_hours_weekly >150 & dat$water_hours_weekly < 168 ~ "150-167",
    dat$water_hours_weekly == 168~ "Continuous"
    )
  dat$water_supply_cat = factor(dat$water_supply_cat, levels = c("5-15", "15-30","30-45","45-60","60-75","75-90",
                                                                 "90-105", "105-120", "120-135","135-150", "150-167","Continuous" ))
    
    dat$tandeo_2021 <- factor(dat$tandeo_2021, levels= c(1,0), labels= c("On Rationing Lists", "Not on Rationing Lists"))
    
    p <- dat %>% 
    as_survey(weights = c("design_weight", "ps_weights")) %>% 
    group_by(tandeo_2021, water_supply_cat) %>% 
    summarize(n= survey_total()) %>% 
    filter(!is.na(tandeo_2021)) %>% 
    ggplot(data=.) +
    geom_bar(aes(x= water_supply_cat, y = n),  position ="dodge", stat= "identity") +
    theme_bw()+
    labs(title="Weekly Water Supply (Self-Reported)", fill= "Rationing List", y = "Count", x="Hours/Week")+
      theme(legend.position ="bottom" )+
      facet_wrap(~tandeo_2021)+
      theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
    ggsave(p, filename = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/1_Plots/5_Spring2022/graphics/water_hours.jpg", width = 8)
    
    
  #  What percent of households are intermittent?
    dat %>% 
      filter(!is.na(intermittent)) %>% 
      as_survey(weights = c("design_weight", "ps_weights")) %>% 
      group_by(intermittent) %>% 
      summarize(outcome= survey_total()) %>% 
      mutate(p = outcome/sum(outcome))
  
 
  ########
  # Places with intermittent water supply have more outages, make more requests and get trucks more frequently 
  ########

  # For appendix: Do this in regression form, using intermittency and water hours
  # outage
  m1_1 <- svyglm(formula = outage_yesno ~ intermittent, design= dat_we, family ="quasibinomial")
  m1_2 <- svyglm(formula = outage_yesno ~ water_service, design= dat_we, family ="quasibinomial")
  m1_3 <- svyglm(formula = outage_yesno ~ water_hours_weekly, design= dat_we, family ="quasibinomial")
  stargazer(m1_1, m1_2, m1_3, type = "text", 
            dep.var.caption = "Experienced water outage",
            dep.var.labels.include = FALSE,
            covariate.labels = c("Intermittent",  "Daily partial", "Less than daily", "Water hours/week", "Constant"),
                                 out = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/3_Tables/4_spring_2022/appendix_outages.tex",
            title = "Households with intermittent water are more likely to have experienced water outages",
            label = "tab:appendix_outages"
            )
  
  
  # appeal
  m1_4 <- svyglm(formula = claim_water ~ intermittent, design= dat_we, family ="quasibinomial")
  m1_5 <- svyglm(formula = claim_water ~ water_service, design= dat_we, family ="quasibinomial")
  m1_6 <- svyglm(formula = claim_water ~ water_hours_weekly, design= dat_we, family ="quasibinomial")
  stargazer(m1_4, m1_5, m1_6, type ="text",
            dep.var.caption = "Has made water-related appeal",
            dep.var.labels.include = FALSE,
            covariate.labels = c("Intermittent",  "Daily partial", "Less than daily", "Water hours/week", "Constant"),
            out = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/3_Tables/4_spring_2022/appendix_appeals.tex",
            title = "Households with intermittent water are more likely to have made a water-related appeal",
            label= "tab:appendix_appeal"
             )
  #trucks
  m1_7 <- svyglm(formula = truck_any ~ intermittent, design= dat_we, family ="quasibinomial")
  m1_8 <- svyglm(formula = truck_any ~ water_service, design= dat_we, family ="quasibinomial")
  m1_9 <- svyglm(formula = truck_any ~ water_hours_weekly, design= dat_we, family ="quasibinomial")
  stargazer(m1_7, m1_8, m1_9, type = "text", 
            dep.var.caption = "Has received water tanker trucks",
            dep.var.labels.include = FALSE,
            covariate.labels = c( "Intermittent",  "Daily partial", "Less than daily", "Water hours/week", "Constant"),
            title = "Households with intermittent water are more likely to have received a water tanker truck",
  out = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/3_Tables/4_spring_2022/appendix_trucks.tex",
  label="tab:appendix_trucks"
  )

  # Make a coefficient plot  for the main paper
  coefs_phase1 <- data_frame(outcome = c("Experiencing\n a water outage", "Making a\n Water-Related\n Claim","Receiving \n a Water Tanker Truck"), 
                            intermittency_coef = c(coef(m1_1)[2], coef(m1_4)[2],coef(m1_7)[2]),
                            intermittency_lower =c(coef(m1_1)[2]- (1.96*0.2352), coef(m1_4)[2]-(1.96 *0.3394),coef(m1_7)[2]- (1.96*0.2583)),
                            intermittency_upper =c(coef(m1_1)[2]+ (1.96*0.2352), coef(m1_4)[2]+(1.96 *0.3394),coef(m1_7)[2]+ (1.96*0.2583)))
  
  pz <- ggplot(data = coefs_phase1) + 
    geom_point(aes(x= outcome, y = intermittency_coef)) + 
    geom_errorbar(aes(x = outcome, ymin = intermittency_lower, ymax = intermittency_upper))+ 
    theme_classic() + 
    geom_hline(yintercept = 0, color = "blue", linetype = "dashed")+
    labs(x = "", y = "Coefficient on Intermittency", 
         title = "IWS Makes Residents More Likely \n to Experience Outages Make Claims, \n and Receive a Tanker Truck", 
         caption = "95% Confidence Interval")+
    theme(axis.title = element_text(size =15), 
          axis.text = element_text(size = 15))
  ggsave(pz, file = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/1_Plots/5_Spring2022/graphics/appeals_trucks.jpg", width = 7)
  
 
  
  
# Does responsiveness shape political evaluations?
  # some summary stats on evaluations of alcalde
    dat$elec_alc_eval <- factor(dat$elec_alc_eval, 
                                levels = c("Muy Malo", "Malo", "Bueno","Muy bueno", "No sé"),
                                labels = c("Very poor", "Poor", "Good", "Very good", "Don't know"))
    dat %>% 
    as_survey(weights = c("design_weight", "ps_weights")) %>% 
    summarize(eval= survey_mean(as.numeric(alc_eval_num), na.rm=TRUE))
   
    evals <- dat %>% 
    as_survey(weights = c("design_weight", "ps_weights")) %>% 
    group_by(alcaldia, elec_alc_eval) %>% 
    summarize(n= survey_total()) 
    
    
    p <- ggplot(evals[!is.na(evals$elec_alc_eval),]) + 
      geom_bar( aes(x= elec_alc_eval, y =n ), stat= "identity")+
      facet_wrap(~alcaldia, nrow=1)+
      theme_bw()+
      theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))+ 
      labs(x= "Evaluation of Mayor", y = "")+
      theme(strip.text.x = element_text(size = 12))
    
  ggsave(p, filename = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/1_Plots/5_Spring2022/graphics/mayor_eval.jpg", width = 10)
    

  # First, make a plot showing the simple correlation coefficients with error bars. This goes in the main paper. 
  m2_1 <- svyglm(alc_eval_num ~ intermittent, design = dat_we, family ="gaussian") # in general, places that have intermittent water evaluate their mayor less favorably 
  m2_2 <- svyglm(alc_eval_num ~  claim_water, design= dat_we, family ="gaussian") # having made a claim is negatively associated with the evaluation of the mayor
  m2_3 <- svyglm(alc_eval_num ~    alc_water_claim_respond, design= dat_we, family ="gaussian") # but getting a response is positively associated with the evaluation of the mayor 
  m2_4 <- svyglm(alc_eval_num ~   alc_water_solve_partial, design= dat_we, family ="gaussian") # especially if the mayor actually solved your problem
  m2_5 <- svyglm(alc_eval_num ~   alc_water_solve_complete, design= dat_we, family ="gaussian") # especially if the mayor actually solved your problem
  
  
  coefs <- c(coef(m2_1)[2],coef(m2_2)[2],coef(m2_3)[2],coef(m2_4)[2],coef(m2_5)[2])
  lower <- c(coef(m2_1)[2]-1.96*summary(m2_1)$coef[4],coef(m2_2)[2]-1.96*summary(m2_2)$coef[4],coef(m2_3)[2]-1.96*summary(m2_3)$coef[4],coef(m2_4)[2]-1.96*summary(m2_4)$coef[4],coef(m2_5)[2]-1.96*summary(m2_5)$coef[4])
  upper <- c(coef(m2_1)[2]+1.96*summary(m2_1)$coef[4],coef(m2_2)[2]+1.96*summary(m2_2)$coef[4],coef(m2_3)[2]+1.96*summary(m2_3)$coef[4],coef(m2_4)[2]+1.96*summary(m2_4)$coef[4],coef(m2_5)[2]+1.96*summary(m2_5)$coef[4])
  names <- c("Respondent has \n intermittent water \n (N=2302)", "Respondent made \n water-related appeal \n (N=2302)", "Mayor responded  \n to appeal \n (N=485)", "Mayor solved problem  \n at least partially \n (N = 486)", "Mayor completely  \n solved problem \n (N=486)")
  coefs_all <- tibble(coefs, lower, upper, names)
  coefs_all$names <- factor(coefs_all$names,
                            levels =  c("Respondent has \n intermittent water \n (N=2302)", "Respondent made \n water-related appeal \n (N=2302)", "Mayor responded  \n to appeal \n (N=485)", "Mayor solved problem  \n at least partially \n (N = 486)", "Mayor completely  \n solved problem \n (N=486)"))
  
  coefs_all$names <- factor(coefs_all$names,
                            levels =c("Mayor completely  \n solved problem \n (N=486)","Mayor solved problem  \n at least partially \n (N = 486)","Mayor responded  \n to appeal \n (N=485)","Respondent made \n water-related appeal \n (N=2302)","Respondent has \n intermittent water \n (N=2302)"))
                            
  
  p_coefs <- ggplot(data= coefs_all)+ 
    geom_point(aes(x= names, y = coefs))+
     geom_errorbar(aes(x= names, ymin=lower, ymax=upper))+ 
    coord_flip() +
    theme_classic()+
      theme(axis.text = element_text(size=15))+
    theme(axis.title= element_text(size=15))+
    theme(title= element_text(size=18))+
    geom_hline(aes(yintercept= 0), color= "grey50", linetype = "dotted")+
    labs(title = "Evaluation of\n Incumbent Mayor", y = "Correlation Coefficient", x="", caption = "95% confidence intervals")
  
  ggsave(p_coefs, file = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/1_Plots/5_Spring2022/graphics/coefs_response.jpg", width =7)

  m2_1 <- svyglm(alc_eval_num ~ intermittent, design = dat_we, family ="gaussian") # in general, places that have intermittent water evaluate their mayor less favorably 
  m2_2 <- svyglm(alc_eval_num ~ intermittent + claim_water, design= dat_we, family ="gaussian") # having made a claim is negatively associated with the evaluation of the mayor
  m2_3 <- svyglm(alc_eval_num ~  intermittent  +  alc_water_claim_respond, design= dat_we, family ="gaussian") # but getting a response is positively associated with the evaluation of the mayor 
  m2_4 <- svyglm(alc_eval_num ~  intermittent  +  alc_water_solve_partial, design= dat_we, family ="gaussian") # especially if the mayor actually solved your problem
  m2_5 <- svyglm(alc_eval_num ~  intermittent  +  alc_water_solve_complete, design= dat_we, family ="gaussian") # especially if the mayor actually solved your problem
  #m2_6 <- svyglm(alc_eval_num ~   alc_water_claim_respond + alc_respond_binary, design= dat_we,family ="gaussian") # 
  
  stargazer(m2_1, m2_2, m2_3, m2_4,m2_5,
            covariate.labels =c("Respondent has \\\\ intermittent water", "Respondent made water-related \\\\ appeal", "Mayor responded \\\\ to appeal", "Mayor solved problem \\\\ at least partially", "Mayor solved \\\\ problem completely"),
            dep.var.caption = "Evaluation of mayor",
            dep.var.labels.include = FALSE,
            title= "Receiving a positive response to a water-appeal is associated with higher evaluations of the mayor",
            label = "tab:eval",
            out = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/3_Tables/4_spring_2022/response_eval.tex")
  
  # copartisan
  m2_1_1 <- svyglm(alc_eval_num ~ intermittent+copartisan, design = dat_we, family ="gaussian") # in general, places that have intermittent water evaluate their mayor less favorably 
  m2_2_1 <- svyglm(alc_eval_num ~  claim_water +copartisan, design= dat_we, family ="gaussian") # having made a claim is negatively associated with the evaluation of the mayor
  m2_3_1 <- svyglm(alc_eval_num ~   alc_water_claim_respond+copartisan, design= dat_we, family ="gaussian") # but getting a response is positively associated with the evaluation of the mayor 
  m2_4_1 <- svyglm(alc_eval_num ~   alc_water_solve_partial +copartisan, design= dat_we, family ="gaussian") # especially if the mayor actually solved your problem
  m2_5_1 <- svyglm(alc_eval_num ~  alc_water_solve_complete+copartisan, design= dat_we, family ="gaussian") # especially if the mayor actually solved your problem
  #m2_6 <- svyglm(alc_eval_num ~   alc_water_claim_respond + alc_respond_binary, design= dat_we,family ="gaussian") # 
  
  
  coefs_party <- c(coef(m2_1_1)[2],coef(m2_2_1)[2],coef(m2_3_1)[2],coef(m2_4_1)[2],coef(m2_5_1)[2])
  lower_party <- c(coef(m2_1_1)[2]-1.96*summary(m2_1_1)$coef[5],coef(m2_2_1)[2]-1.96*summary(m2_2_1)$coef[5],coef(m2_3_1)[2]-1.96*summary(m2_3_1)$coef[5],coef(m2_4_1)[2]-1.96*summary(m2_4_1)$coef[5],coef(m2_5_1)[2]-1.96*summary(m2_5_1)$coef[5])
  upper_party <- c(coef(m2_1_1)[2]+1.96*summary(m2_1_1)$coef[5],coef(m2_2_1)[2]+1.96*summary(m2_2_1)$coef[5],coef(m2_3_1)[2]+1.96*summary(m2_3_1)$coef[5],coef(m2_4_1)[2]+1.96*summary(m2_4_1)$coef[5],coef(m2_5_1)[2]+1.96*summary(m2_5_1)$coef[5])
  names_party <- c("Respondent has \n intermittent water \n (N=2302)", "Respondent made \n water-related appeal \n (N=2302)", "Mayor responded  \n to appeal \n (N=485)", "Mayor solved problem  \n at least partially \n (N = 486)", "Mayor completely  \n solved problem \n (N=486)")
  coefs_all_party <- tibble(coefs_party, lower_party, upper_party, names_party)
  coefs_all_party$names_party <- factor(coefs_all_party$names_party,
                            levels =  c("Respondent has \n intermittent water \n (N=2302)", "Respondent made \n water-related appeal \n (N=2302)", "Mayor responded  \n to appeal \n (N=485)", "Mayor solved problem  \n at least partially \n (N = 486)", "Mayor completely  \n solved problem \n (N=486)"))
  
  coefs_all_party$names_party <- factor(coefs_all_party$names_party,
                            levels =c("Mayor completely  \n solved problem \n (N=486)","Mayor solved problem  \n at least partially \n (N = 486)","Mayor responded  \n to appeal \n (N=485)","Respondent made \n water-related appeal \n (N=2302)","Respondent has \n intermittent water \n (N=2302)"))
  
  
  p_coefs_party <- ggplot(data= coefs_all_party)+ 
    geom_point(aes(x= names_party, y = coefs_party))+
    geom_errorbar(aes(x= names_party, ymin=lower_party, ymax=upper_party))+ 
    coord_flip() +
    theme_classic()+
    theme(axis.text = element_text(size=15))+
    theme(axis.title= element_text(size=15))+
    theme(title= element_text(size=18))+
    geom_hline(aes(yintercept= 0), color= "grey50", linetype = "dotted")+
    labs(title = "Evaluation of\n Incumbent Mayor \n (With Controls for Copartisanship)", y = "Correlation Coefficient", x="", caption = "95% confidence interval")
  
  ggsave(p_coefs_party, file = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/1_Plots/5_Spring2022/graphics/coefs_response_party.jpg", width = 8)
  
  
  
  stargazer(m2_1_1, m2_2_1, m2_3_1, m2_4_1,m2_5_1,type="text",
            covariate.labels =c("Respondent has \\\\ intermittent water", "Respondent made water-related \\\\ appeal", "Mayor responded \\\\ to appeal", "Mayor solved problem \\\\ at least partially", "Mayor solved \\\\ problem completely", "Respondent is \\\\ copartisan"),
            dep.var.caption = "Evaluation of mayor",
            dep.var.labels.include = FALSE,
            title= "Mayoral Evaluations controlling for individual copartisanship",
            label = "tab:eval_party",
            notes= "Includes individual controls for party affiliation included",
            out = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/3_Tables/4_spring_2022/appendix_response_eval_party.tex")
  
  
  m2_1_2 <- svyglm(alc_eval_num ~ intermittent+factor(cve_alc), design = dat_we, family ="gaussian") # in general, places that have intermittent water evaluate their mayor less favorably 
  m2_2_2 <- svyglm(alc_eval_num ~ intermittent + claim_water +factor(cve_alc), design= dat_we, family ="gaussian") # having made a claim is negatively associated with the evaluation of the mayor
  m2_3_2 <- svyglm(alc_eval_num ~  intermittent  +  alc_water_claim_respond+factor(cve_alc), design= dat_we, family ="gaussian") # but getting a response is positively associated with the evaluation of the mayor 
  m2_4_2 <- svyglm(alc_eval_num ~  intermittent  +  alc_water_solve_partial +factor(cve_alc), design= dat_we, family ="gaussian") # especially if the mayor actually solved your problem
  m2_5_2 <- svyglm(alc_eval_num ~  intermittent  +  alc_water_solve_complete+factor(cve_alc), design= dat_we, family ="gaussian") # especially if the mayor actually solved your problem
   
  stargazer(m2_1_2, m2_2_2, m2_3_2, m2_4_2,m2_5_2,type ="text",
            keep = c("intermittent", "claim_water", "alc_water_claim_respond", "alc_water_solve_partial", "alc_water_solve_complete"),
            covariate.labels =c("Respondent has \\\\ intermittent water", "Respondent made water-related \\\\ appeal", "Mayor responded \\\\ to appeal", "Mayor solved problem \\\\ at least partially", "Mayor solved \\\\ problem completely"),
            dep.var.caption = "Evaluation of mayor",
            dep.var.labels.include = FALSE,
            title= "Mayoral Evaluations with Alcaldia Fixed Effects",
            notes= "Alcaldia fixed effects",
            out = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/3_Tables/4_spring_2022/appendix_response_eval_alc.tex",
            label = "tab:eval_alc")
  
  m2_1_3 <- svyglm(alc_eval_num ~ intermittent+factor(cve_alc)+party, design = dat_we, family ="gaussian") # in general, places that have intermittent water evaluate their mayor less favorably 
  m2_2_3 <- svyglm(alc_eval_num ~ intermittent + claim_water +factor(cve_alc)+party, design= dat_we, family ="gaussian") # having made a claim is negatively associated with the evaluation of the mayor
  m2_3_3 <- svyglm(alc_eval_num ~  intermittent  +  alc_water_claim_respond+factor(cve_alc) +party, design= dat_we, family ="gaussian") # but getting a response is positively associated with the evaluation of the mayor 
  m2_4_3 <- svyglm(alc_eval_num ~  intermittent  +  alc_water_solve_partial +factor(cve_alc)+party, design= dat_we, family ="gaussian") # especially if the mayor actually solved your problem
  m2_5_3 <- svyglm(alc_eval_num ~  intermittent  +  alc_water_solve_complete+factor(cve_alc)+party, design= dat_we, family ="gaussian") # especially if the mayor actually solved your problem

  stargazer(m2_1_3, m2_2_3, m2_3_3, m2_4_3,m2_5_3,type ="text",
            keep = c("intermittent", "claim_water", "alc_water_claim_respond", "alc_water_solve_partial", "alc_water_solve_complete"),
            covariate.labels =c("Respondent has \\\\ intermittent water", "Respondent made water-related \\\\ appeal", "Mayor responded \\\\ to appeal", "Mayor solved problem \\\\ at least partially", "Mayor solved \\\\ problem completely"),
            dep.var.caption = "Evaluation of mayor",
            dep.var.labels.include = FALSE,
            title= "Mayoral evaluations with alcaldia and party fixed effects",
            out = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/3_Tables/4_spring_2022/appendix_response_eval_party_alc.tex",
            notes = "Alcaldia fixed effects and party affiliation controls",
            label = "tab:eval_both")
  
  
  # For the appendix: Responsiveness
  m3_1 <- svyglm(alc_eval_num ~   alc_respond_binary, design= dat_we,family ="gaussian") # 
  m3_2 <- svyglm(alc_eval_num ~    alc_respond_binary+ alc_water_claim_respond, design= dat_we,family ="gaussian") # 
  m3_3 <- svyglm(alc_respond_binary ~   alc_water_claim_respond, design= dat_we,family ="quasibinomial") # 
  stargazer(m3_1, m3_2, m3_3,
            covariate.labels =c("Believes mayor responds to citizen requests", "Received response from mayor"),
            dep.var.labels = c("Mayoral evaluation", "Believes mayor is responsive"),
            model.names = FALSE, 
            title= "Responsiveness",
            label ="tab:resp",
            out ="/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/3_Tables/4_spring_2022/appendix_responsiveness.tex")

  
  # reasons for evaluation 
  dat$elec_alc_eval_follow_clean <- factor(dat$elec_alc_eval_follow,
                                           levels = c("Otro", "Por (no) cumplir sus promesas", "Por la atención ciudadana",
                                                      "Por la calidad de los servicios públicos en la alcaldía", "Por la obras públicas que (no) realizó",
                                                      "Por la respuesta a COVID-19", "Por los beneficios sociales que (no) proporcionó"),
                                           labels = c("Other", "Keeping or \n not keeping \n their promises", "Citizen attention", "Public works \n or lack of", 
                                                      "Public service \n quality","Covid-19 response", "Social benefits" ))
  rm(list=setdiff(ls(), c("dat", "dat_we")))
  ########################
  # Looking at mechanisms 
  ########################
  # Clarity of responsibility is greater for water trucks than for intermittency -- if you receive water trucks
  dat$alc_responsible_binary_grid <- ifelse(dat$sp_resp_water =="La alcaldía"| dat$sp_resp_water =="La alcaldía (Acepta también delegación)",1,0 )
  dat$alc_responsible_binary_trucks <- ifelse(dat$sp_resp_trucks =="La alcaldía"| dat$sp_resp_trucks =="La alcaldía (Acepta también delegación)",1,0 )
  
  service_cert <- dat %>% 
    as_survey(weights = c("design_weight", "ps_weights")) %>% 
    filter(alc_responsible_binary_grid==1) %>% 
    group_by(truck_any) %>% 
    summarize(outcome= survey_mean(as.numeric(sp_uncert_water_1), na.rm=TRUE, vartype = "ci", level= .95)) %>% mutate(var= "Water grid service")
  trucks_cert <- dat %>% 
    as_survey(weights = c("design_weight", "ps_weights")) %>% 
    filter(alc_responsible_binary_grid ==1) %>% 
    group_by(truck_any) %>% 
    summarize(outcome= survey_mean(as.numeric(sp_uncert_trucks_1), na.rm=TRUE, vartype = "ci", level = .95)) %>% mutate(var= "Water tanker trucks")
  
  out_cert <- bind_rows(service_cert,trucks_cert) 
  
  p_attribution <- ggplot(out_cert)+ 
    geom_point(aes(x= var, y = outcome, group = factor(truck_any), color=factor(truck_any), shape = factor(truck_any)), position =position_dodge(width=.4), size= 2.5) + 
    geom_errorbar(aes(x= var, ymin=outcome_low, max=outcome_upp, group = factor(truck_any), color = factor(truck_any)), position =position_dodge(width=.4), width=.2) + 
    theme_bw()+
    scale_shape_manual(name = "Receives Tanker Trucks", values=c(6,1)) +
    scale_colour_grey(name = "Receives Tanker Trucks", start= .5, end=0)+
    labs(x= "", y = "Certainty (1-10)",
         title = "How certain are you that the mayor's office  \n is responsible for each service?", 
         color = "Receives water trucks", caption ="95% confidence intervals")+
    theme(legend.position = "bottom")
  
  ggsave(p_attribution, file = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/1_Plots/5_Spring2022/graphics/attribution.jpg")
  rm(list=setdiff(ls(), c("dat", "dat_we")))
  
  
#######
  # Alternative Explanations: Partisanship 

  # Are copartisans more likely to have continuous water?
  m5_1 <- svyglm(formula = intermittent ~ copartisan, design= dat_we, family ="quasibinomial")
  # Are copartisans less likely to experience outages?
  m5_2 <- svyglm(formula = outage_yesno ~ copartisan, design= dat_we, family ="quasibinomial")
  # Are copartisans more or less likely to make claims?
  m5_3 <- svyglm(formula = claim_water ~ copartisan, design= dat_we, family ="quasibinomial")
  # Are copartisans more or less likely to have their claims answered?
  m5_4 <- svyglm(formula = alc_water_claim_respond ~ copartisan, design= dat_we, family ="quasibinomial")
  
  
  coefs <- c(coef(m5_1)[2],coef(m5_2)[2],coef(m5_3)[2],coef(m5_4)[2])
  lower <- c(coef(m5_1)[2]-1.96*summary(m5_1)$coef[4],coef(m5_2)[2]-1.96*summary(m5_2)$coef[4],coef(m5_3)[2]-1.96*summary(m5_3)$coef[4],coef(m5_4)[2]-1.96*summary(m5_4)$coef[4])
  upper <- c(coef(m5_1)[2]+1.96*summary(m5_1)$coef[4],coef(m5_2)[2]+1.96*summary(m5_2)$coef[4],coef(m5_3)[2]+1.96*summary(m5_3)$coef[4],coef(m5_4)[2]+1.96*summary(m5_4)$coef[4])
  names <- c("Experiencing Intermittency", "Experiencing an Outage", "Making Water Related Appeal", "Receiving a Response to appeal")
  coefs_all <- tibble(coefs, lower, upper, names)
  coefs_all$names <- factor(coefs_all$names,
                            levels =  c("Experiencing Intermittency", "Experiencing an Outage", "Making Water Related Appeal", "Receiving a Response to appeal"),
                            labels =  c("Experiencing \n Intermittency", "Experiencing \n an Outage", "Making \n Water-Related\n Appeal", "Receiving a\n Response \nto appeal"))

  
  p_coefs_copartisanship <- ggplot(data= coefs_all)+ 
    geom_point(aes(x= names, y = coefs))+
    geom_errorbar(aes(x= names, ymin=lower, ymax=upper))+ 
    theme_classic()+
    theme(axis.text = element_text(size=12))+
    theme(axis.title= element_text(size=15))+
    theme(title= element_text(size=18))+
    geom_hline(aes(yintercept= 0), color= "grey50", linetype = "dotted")+
    labs(title = "Individual Copartisanship", y = "Correlation Coefficient", x="", caption = "95% confidence intervals")
  
  
#should also try neighborhoood's partisan status (which i should have from survey blocking)
  #note that this analysis only uses in person because i don't have the seccion for the online sample
  # load in vote share data from 2015-2018
  load("chapter_5_rewarding_responsiveness/additional_data/vote_shares_15_18.Rdata")
  elections <- elections[elections$cve_alc %in% c("05", "06", "07","11","17"),]
  elections$cve_alc <- str_pad(elections$cve_alc,3, "left", "0")
  elections <- left_join(elections, unique(dat[,c("cve_alc", "alcaldia")]))
  elections$vs_incumbent <- case_when(
    elections$alcaldia == "Gustavo A Madero"| elections$alcaldia == "Iztacalco"|elections$alcaldia == "Iztapalapa" |elections$alcaldia == "Tláhuac" ~ elections$vsmorena_jd_2018,
    elections$alcaldia=="Venustiano Carranza" ~ elections$vsprd_jd_2018,
    TRUE ~ NA_real_
  )
  # load data for just in person survey 
  load( "citizen_public_service_survey/survey_inperson.Rdata")
  ip <- left_join(ip, elections, by = "cve_secc")
  ip_we <- ip %>% 
    as_survey(weights = c("weight"))
  

  
  m6_1 <- svyglm(formula = intermittent ~ vs_incumbent, design= ip_we, family ="quasibinomial")
  # Are people from copartisan neighborhoods less likely to experience outages?
  m6_2 <- svyglm(formula = outage_yesno ~ vs_incumbent, design= ip_we, family ="quasibinomial")
  # Are people from copartisan neighborhoods more or less likely to make claims?
  m6_3 <- svyglm(formula = claim_water ~ vs_incumbent, design= ip_we, family ="quasibinomial")
  # Are people from copartisan neighborhoods more or less likely to have their claims answered?
  m6_4 <- svyglm(formula = alc_water_claim_respond ~ vs_incumbent, design= ip_we, family ="quasibinomial")
  
  coefs_c <- c(coef(m6_1)[2],coef(m6_2)[2],coef(m6_3)[2],coef(m6_4)[2])
  lower_c <- c(coef(m6_1)[2]-1.96*summary(m6_1)$coef[4],coef(m6_2)[2]-1.96*summary(m6_2)$coef[4],coef(m6_3)[2]-1.96*summary(m6_3)$coef[4],coef(m6_4)[2]-1.96*summary(m6_4)$coef[4])
  upper_c <- c(coef(m6_1)[2]+1.96*summary(m6_1)$coef[4],coef(m6_2)[2]+1.96*summary(m6_2)$coef[4],coef(m6_3)[2]+1.96*summary(m6_3)$coef[4],coef(m6_4)[2]+1.96*summary(m6_4)$coef[4])
  names_c <- c("Experiencing Intermittency", "Experiencing an Outage", "Making Water Related Appeal", "Receiving a Response to appeal")
  coefs_all_c <- tibble(coefs_c, lower_c, upper_c, names_c)
  coefs_all_c$names_c <- factor(coefs_all_c$names_c,
                            levels =  c("Experiencing Intermittency", "Experiencing an Outage", "Making Water Related Appeal", "Receiving a Response to appeal"),
                            labels =  c("Experiencing \n Intermittency", "Experiencing \n an Outage", "Making \n Water-Related\n Appeal", "Receiving a\n Response \nto appeal"))
  
  
  p_coefs_alignment <- ggplot(data= coefs_all_c)+ 
    geom_point(aes(x= names_c, y = coefs_c))+
    geom_errorbar(aes(x= names_c, ymin=lower_c, ymax=upper_c))+ 
    theme_classic()+
    theme(axis.text = element_text(size=12))+
    theme(axis.title= element_text(size=15))+
    theme(title= element_text(size=18))+
    geom_hline(aes(yintercept= 0), color= "grey50", linetype = "dotted")+
    labs(title = "Neighborhood Partisan Alignment", y = "Correlation Coefficient", x="", caption ="")
  
  
 copartisanship_plot <-  grid.arrange(p_coefs_alignment,p_coefs_copartisanship,nrow=1)
  
  ggsave(copartisanship_plot, file = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/1_Plots/5_Spring2022/graphics/coefs_copartisanship.jpg", width = 12, title= "Relationship between partisan alignment and water-related outcomes")
  
     